Fonctions, variables

source("/shared/ifbstor1/home/acolajanni/scripts/fun_interactions.r")

# LOADING DATA 
load("/shared/ifbstor1/home/acolajanni/data/phic/rdata/3div_0.05.Rdata")
data = File_list_filtered0.05
rm(File_list_filtered0.05)
data_05 = lapply(data, hic_format )
data_01 = lapply(data_05, function(x) filter_pvalue(df=x, seuil = 2))


heatmap_range = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410-2e+6, 89777404+2e+6)
)

# bins of 8kb
binsize = 8000
# bins of 8kb for heatmap visualisations center around polr3g bait
borne_inf = seq(89768410-2e+6 , 89777404+2e+6, by = binsize)
borne_sup = seq(89768410-2e+6+binsize-1 , 89777404+2e+6+binsize-1, by = binsize)

# Subset to keep chr5 (faster computation)
chr5 = lapply(data_05, function(x) subset_chr(x, "chr5"))

# Creating the list of subsetted dataframe around the interval 
range_4MB = lapply(chr5, function(x) subset_by_interval(x,89768410-2e+6, 89777404+2e+6) )

# add empty interactions (diagonal) for heatmap visualisation
diagonal = data.frame("frag1"=paste0(borne_inf,"-",borne_sup), 
                      "frag2"=paste0(borne_inf,"-",borne_sup))

# Adding "diagonal" values in each dataframe of the list
range_4MB = lapply(range_4MB, function(x) rbind(dplyr::select(x,frag1,frag2), diagonal))


# transform it into a single dataframe
range_4MB = ldply(range_4MB)

# bins into dataframe for easier steps
bornes = data.frame("start"=borne_inf, "end"=borne_sup)

# correspondence between bins and frag1 and 2 chromosomal positions
include_in_bornes = get_position_inside_bins(range_4MB,bornes, 8000, "frag1")
include_in_bornes2 = get_position_inside_bins(range_4MB,bornes, 8000, "frag2")


# Merge all the bins interactions
tmp = merge(dplyr::select(range_4MB,frag1,frag2),include_in_bornes, by.x = "frag1", by.y = ".id") 
interactions = merge(tmp, include_in_bornes2, by.x = "frag2", by.y = ".id")


# Middle position of the bin
interactions = interactions %>% mutate(
  frag1 = (start.x + end.x )/ 2,
  frag2 = (start.y + end.y )/ 2
) %>% dplyr::select(frag1, frag2)


# Symetric matrix :
interactions_reverse = filter(interactions, frag1 != frag2)
interactions_reverse = interactions_reverse %>% mutate(
  frag1_r = frag2,
  frag2_r = frag1
  ) %>% dplyr::select(frag1_r, frag2_r)

colnames(interactions_reverse) = c("frag1","frag2")
interactions = rbind(interactions, interactions_reverse)

# To create square matrix
mat = as.matrix(xtabs(~frag1+frag2,interactions))

Heatmap d’interactions (p < 0.05)

pheatmap(log2(mat+1),
         cluster_cols = FALSE, 
         cluster_rows = FALSE,
         show_rownames = TRUE, 
         show_colnames = TRUE,
         scale = "none",
         #breaks = breaks,
         #inferno(8, direction = -1)
         brewer.pal(8, name = "YlOrRd"),
         fontsize = 12,
         main= "Contact heatmap center on POLR3G bait (87-91 Mbon chr5, p<0.05)")  

polr3g_exact = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410, 89777404)
)

interaction_polr3g = 
  lapply(data_05, function(x) subset_by_overlap(ROI = polr3g_exact, df = x))

interaction_polr3g = ldply(interaction_polr3g)

ref = GRanges(
  unique(interaction_polr3g$interaction_polr3g),
  seqnames = Rle("chr5"))  

atrack <- AnnotationTrack(ref)
gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000))
itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", 
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)

ht = HighlightTrack(trackList = list(itrack, gtrack,atrack),
                    start = 89768410, end = 89777404, name = "POLR3G_Bait")

plotTracks(list(ht))

Avec p<0.01

# Subset to keep chr5 (faster computation)
chr5 = lapply(data_01, function(x) subset_chr(x, "chr5"))

# Creating the list of subsetted dataframe around the interval 
range_4MB = lapply(chr5, function(x) subset_by_interval(x,89768410-2e+6, 89777404+2e+6) )

# add empty interactions (diagonal) for heatmap visualisation
diagonal = data.frame("frag1"=paste0(borne_inf,"-",borne_sup), 
                      "frag2"=paste0(borne_inf,"-",borne_sup))

# Adding "diagonal" values in each dataframe of the list
range_4MB = lapply(range_4MB, function(x) rbind(dplyr::select(x,frag1,frag2), diagonal))


# transform it into a single dataframe
range_4MB = ldply(range_4MB)

# bins into dataframe for easier steps
bornes = data.frame("start"=borne_inf, "end"=borne_sup)

# correspondence between bins and frag1 and 2 chromosomal positions
include_in_bornes = get_position_inside_bins(range_4MB,bornes, 8000, "frag1")
include_in_bornes2 = get_position_inside_bins(range_4MB,bornes, 8000, "frag2")


# Merge all the bins interactions
tmp = merge(dplyr::select(range_4MB,frag1,frag2),include_in_bornes, by.x = "frag1", by.y = ".id") 
interactions = merge(tmp, include_in_bornes2, by.x = "frag2", by.y = ".id")


# Middle position of the bin
interactions = interactions %>% mutate(
  frag1 = (start.x + end.x )/ 2,
  frag2 = (start.y + end.y )/ 2
) %>% dplyr::select(frag1, frag2)


# Symetric matrix :
interactions_reverse = filter(interactions, frag1 != frag2)
interactions_reverse = interactions_reverse %>% mutate(
  frag1_r = frag2,
  frag2_r = frag1
  ) %>% dplyr::select(frag1_r, frag2_r)

colnames(interactions_reverse) = c("frag1","frag2")
interactions = rbind(interactions, interactions_reverse)

# To create square matrix
mat = as.matrix(xtabs(~frag1+frag2,interactions))
pheatmap(log2(mat+1),
         cluster_cols = FALSE, 
         cluster_rows = FALSE,
         show_rownames = TRUE, 
         show_colnames = TRUE,
         scale = "none",
         #breaks = breaks,
         #inferno(8, direction = -1)
         brewer.pal(8, name = "YlOrRd"),
         fontsize = 12,
         main= "Contact heatmap center on POLR3G bait (87-91 Mbon chr5, p<0.01)")  

load("/shared/ifbstor1/home/acolajanni/data/interactions_polr3g.Rdata")

bengi = GRanges(polr3g_BENGI$cCRE, seqnames = Rle("chr5"))
range_bengi = GRanges(
  seqnames = Rle("chr"),
  IRanges(polr3g_BENGI$cCRE)
)

polr3g_exact = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410, 89777404)
)

interaction_polr3g = 
  lapply(data_01, function(x) subset_by_overlap(ROI = polr3g_exact, df = x))

interaction_polr3g = ldply(interaction_polr3g)

# colors scale
interactions = data.frame(table(interaction_polr3g$interaction_polr3g))
colors = viridis(max(interactions$Freq), 
                 direction = -1)[cut(interactions$Freq,max(interactions$Freq))] 

inf = seq(88000000,91000000,by = (91000000-88000000)/max(interactions$Freq))
sup = seq(88290000,91000000,by = (91000000-88000000)/max(interactions$Freq))
bornes = paste0(inf[1:10],"-",sup[1:10])
color_scale = GRanges(bornes, seqnames = Rle("chr5"))


ref = GRanges(
  unique(interaction_polr3g$interaction_polr3g),
  seqnames = Rle("chr5"))  

atrack <- AnnotationTrack(ref, name = "3div", fill = colors, col = NULL)
atrack_bengi = AnnotationTrack(bengi, name = "BENGI",col = NULL, fill = "black")
atrack_scale = AnnotationTrack(color_scale, col = NULL, 
                               fill =viridis(10, direction = -1),
                               showFeatureId=TRUE,
                               id = c(1:10)) 




gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000))
itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", 
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)

ht = HighlightTrack(trackList = list(atrack_scale, itrack, gtrack,atrack, atrack_bengi),
                    start = 89768410, end = 89777404, name = "POLR3G_Bait")

plotTracks(list(ht))
## Warning in seq_len(start(y)): first element used of 'length.out' argument
## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

BENGI vs 3div (p < 0.05)

range_3div = GRanges(
  seqnames=Rle("chr5"),
  IRanges(polr3g_3div$interaction_polr3g)
)

range_bengi = GRanges(
  seqnames = Rle("chr"),
  IRanges(polr3g_BENGI$cCRE)
)

length(findOverlaps(range_3div, range_bengi))
## Warning in .Seqinfo.mergexy(x, y): The 2 combined objects have no sequence levels in common. (Use
##   suppressWarnings() to suppress this warning.)
## [1] 0
# colors scale
vir = viridis(15,direction = -1)
colors = vir[cut(polr3g_3div$Freq,15)] 

inf = seq(88000000,91000000,by = (91000000-88000000)/15)
sup = seq(88190000,91000000,by = (91000000-88000000)/15)
bornes = paste0(inf[1:15],"-",sup[1:15])
color_scale = GRanges(bornes, seqnames = Rle("chr5"))



bengi = GRanges(polr3g_BENGI$cCRE, seqnames = Rle("chr5"))

div3 = GRanges(
  polr3g_3div$interaction_polr3g,
  seqnames = Rle("chr5"))  

color_scale = GRanges(bornes, seqnames = Rle("chr5"))

atrack_3div <- AnnotationTrack(div3, name = "3div", fill = colors, col = NULL)
atrack_bengi = AnnotationTrack(bengi, name = "BENGI",col = NULL, fill = "black")
atrack_scale = AnnotationTrack(color_scale, col = NULL, 
                               fill =viridis(15, direction = -1),
                               showFeatureId=TRUE,
                               id = c(1:15)) 
gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000), chromosome = "chr5")



itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", # specify chromosome in ucsc naming
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)

ht = HighlightTrack(trackList = list(atrack_scale, itrack, gtrack,atrack_3div, atrack_bengi),
                    start = 89768410, end = 89777404, name = "POLR3G_Bait")
plotTracks(list(ht),background.title = "darkblue")
## Warning in seq_len(start(y)): first element used of 'length.out' argument
## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used